%%html
<link rel="stylesheet" type="text/css" href="rise.css" />
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
plt.rcParams["axes.labelsize"] = 16
plt.rcParams["axes.titlesize"] = 18
plt.rcParams["legend.fontsize"] = 14
Principal Component Analysis (PCA) with EEG waveformsΒΆ
Learning goals
- You will be able to apply PCA to time series.
- You will be able to classify time series and visualize the classification in a low number of PCs.
- You will be able to visualize classified time series in higher than three dimensions.
- You will appreciate how classification of time series waveforms could be beneficial for interpreting experimental data
EEG recordings
EEGs = np.load("data/EEGs.npy")
# [channel, time, trial]
EEGs.shape
(64, 640, 99)
# Visualize EEGs from a few channels and trials
fig = plt.figure(figsize=[8,8])
ax = plt.axes(projection='3d')
cmap = mpl.colormaps['tab10'].resampled(10).colors
for channel in range(5):
for trial in range(3):
ax.plot(np.arange(640), np.ones(640) * trial, EEGs[channel,:,trial] - 50*channel, color=cmap[trial%10,:])
ax.set_xlabel('axis 1 = Time Pt')
ax.set_ylabel('axis 2 = Trial')
ax.set_zlabel('axis 0 = Channel')
ax.set_title('A few EEGs colored by trial')
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
ax.view_init(-155, -110)
ax.set_box_aspect(None, zoom=0.9)
plt.tight_layout();
# Visualize all EEGs
fig = plt.figure(figsize=[8,8])
ax = plt.axes(projection='3d')
for channel in range(64):
for trial in range(99):
plt.plot(np.arange(640), np.ones(640) * trial, EEGs[channel,:,trial] - 50*channel, color=cmap[trial%10,:])
ax.set_xlabel('Time Pt 0-639')
ax.set_ylabel('Trial 0-98')
ax.set_zlabel('Channel 0-63')
ax.set_title('All EEGs')
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
ax.view_init(-155, -110)
ax.dist = 10
plt.tight_layout();
Get all EEGs across trials for channel 23.
EEG_23_all = EEGs[23,:,:]
EEG_23_all.shape
(640, 99)
# Visualize subset of EEGs from channel 23
fig = plt.figure(figsize=[8,8])
ax = plt.axes(projection='3d')
for channel in range(64):
for trial in range(99):
plt.plot(np.arange(640), np.ones(640) * trial, EEGs[channel,:,trial] - 50*channel, color=cmap[trial%10,:])
ax.set_xlabel('Time Pt 0-639')
ax.set_ylabel('Trial 0-98')
ax.set_zlabel('Channel 0-63')
ax.set_title('Subset of EEGs for channel 23')
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
ax.view_init(-155, -110)
ax.set_box_aspect(None, zoom=0.9)
xx, yy = np.meshgrid(range(640), range(99))
zz = xx.copy()
zz[:] = EEG_23_all.mean() - 50*23
ax.plot_surface(xx, yy, zz, alpha=0.5)
plt.tight_layout();
# average across trials
avgEEGs = EEGs.mean(axis=2)
avgEEGs.shape
(64, 640)
plt.rcParams['figure.figsize'] = [15,5]
plt.plot(avgEEGs[23], label='channel 23')
plt.plot(avgEEGs[40], label='channel 40')
plt.xlabel('Time pt')
plt.ylabel(r'EEG ($\mu$V)')
plt.title('EEG Trial averages')
plt.legend();
Each EEG is 640 time points --> a 640-dimensional vector
We could plot each EEG as a point in a 640-dimensional space
Question?ΒΆ
Should we standardize the EEG data before performing PCA? Why or why not?
Note: Since each time point is a dimension, this would amount to normalizing the variance across EEGs at each time point.
Can we reasonably explain each 640 sample length EEG with ONLY 3 values instead of 640!?
from sklearn.decomposition import PCA
pca = PCA(3)
# rows are samples (EEGs), columns are features/dimensions (time pts)
pca.fit(avgEEGs)
PCA(n_components=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA(n_components=3)
Done! But how do we interpret this?
The three principal component axes are each a direction in the 640-dimensional coordinate system for the EEGs.
pca.components_.shape
(3, 640)
The 640-dimensional coordinate system for the EEGs represents the observed EEG amplitude ($\mu V$) at each of the 640 time points.
The direction of each principal component defines a particular set of relative amplitudes at all 640 time points.
Thus, each principal component defines a particular EEG waveform shape.
for i in range(3):
plt.plot(pca.components_[i], label=f'PC{i}')
plt.xlabel('Time pt')
plt.ylabel(r'EEG ($\mu$V)')
plt.title('Principal Components can themselves be interpreted as EEG waveforms')
plt.legend();
So when we describe each 640 sample length EEG with only 3 values, what we are actually doing is describing each EEG as a mixture of the 3 principal component EEG shapes.
avgEEGs_pc = pca.transform(avgEEGs)
avgEEGs_pc.shape
(64, 3)
avgEEGs_pc[:5,:]
array([[-50.390007, 5.429818, 9.518041],
[-44.33464 , 12.796195, 6.094212],
[-39.27459 , -9.802836, 2.435461],
[ -8.72372 , 32.096066, -19.62328 ],
[-27.016552, 8.502509, -5.048854]], dtype=float32)
Let's see how well a mixture of the 3 principal components can describe the average EEG in channel 0.
avgEEG0 = avgEEGs[0]
# PCA components are relative to the mean across the EEG dataset
avgEEG0_projected = pca.mean_.copy() # !!! <-- If we don't copy this we'll end up changing pca.mean_
for i in range(3):
avgEEG0_projected += (avgEEGs_pc[0,i] * pca.components_[i])
plt.plot(avgEEG0, label="Original")
plt.plot(avgEEG0_projected, label="Projected onto 3 PCs")
plt.xlabel('Time pt')
plt.ylabel(r'EEG ($\mu$V)')
plt.title('Average EEG in Channel 0');
plt.legend();
pca.inverse_transform(avgEEGs_pc) does this mixing for us for the entire dataset...
avgEEGs_projected = pca.inverse_transform(avgEEGs_pc)
avgEEGs_projected.shape
(64, 640)
Let's see how good it did for channel 9...
plt.plot(avgEEGs[9], label="Original")
plt.plot(avgEEGs_projected[9], label="Projected onto 3 PCs")
plt.xlabel('Time pt')
plt.ylabel(r'EEG ($\mu$V)')
plt.title('Average EEG in Channel 9');
plt.legend();
The PCA projected EEGs look like they are a pretty good representation of the original EEGs, but there are some differences due to the fact that we threw out the variance along 637 dimensions!
How much information did we lose? Or conversely, how much of the variance in the data do we still explain after projecting each EEG onto only 3 dimensions?
plt.figure(figsize=[5,5])
n = np.arange(1, 4)
plt.plot(n, np.cumsum(pca.explained_variance_ratio_), 'o-')
plt.xticks(n)
plt.xlabel('# PCs')
plt.ylabel('Explained Variance');
Wow, ~90% of variance explained using ONLY 3 PCs!
Let's plot all of our 64 EEG waveforms in the projected 3-dimensional PCA space.
fig = plt.figure(figsize=[6,6])
ax = fig.add_subplot(111, projection='3d')
ax.scatter(avgEEGs_pc[:,0], avgEEGs_pc[:,1], avgEEGs_pc[:,2])
ax.set_xlabel('PC0')
ax.set_ylabel('PC1')
ax.set_zlabel('PC2')
ax.set_box_aspect(None, zoom=0.8)
fig.tight_layout();
Let's try clustering these recordings into two groups using $k$-means...
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2)
labels = kmeans.fit_predict(avgEEGs_pc)
fig = plt.figure(figsize=[6,6])
ax = fig.add_subplot(111, projection='3d')
ax.scatter(avgEEGs_pc[:,0], avgEEGs_pc[:,1], avgEEGs_pc[:,2], c=labels, cmap='coolwarm')
ax.set_xlabel('PC0')
ax.set_ylabel('PC1')
ax.set_zlabel('PC2')
ax.set_box_aspect(None, zoom=0.8)
fig.tight_layout();
Do waveforms in each cluster have distinct shapes?
[fig, ax] = plt.subplots(2, 1)
for channel in range(64):
cluster = labels[channel]
ax[cluster].plot(avgEEGs_projected[channel,:])
ax[0].set_ylabel(r'EEG ($\mu$V)')
ax[0].set_title('Cluster 0')
ax[1].set_xlabel('Time pt')
ax[1].set_ylabel(r'EEG ($\mu$V)')
ax[1].set_title('Cluster 1')
plt.tight_layout();
Let's try clustering with a GMM and using BIC or silhouette score to determine the optimal number of clusters...
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score
X = avgEEGs_pc
n_components = np.arange(1, 9)
models = [GaussianMixture(n_components=n, random_state=0) for n in n_components]
for model in models:
model.fit(X)
bic = [model.bic(X) for model in models]
silhouette = [silhouette_score(X, model.predict(X)) for model in models[1:]]
plt.subplot(1, 2, 1)
plt.plot(n_components, bic, 'o-')
plt.xlabel('# Clusters');
plt.ylabel('Bayesian Information Criteria (BIC)')
plt.subplot(1, 2, 2)
plt.plot(n_components[1:], silhouette, 'o-')
plt.xlabel('# Clusters');
plt.ylabel('Silhouette Score');
Cluster EEGs using a GMM with 2 components.
model = GaussianMixture(n_components=2)
labels = model.fit_predict(avgEEGs_pc)
fig = plt.figure(figsize=[6,6])
ax = fig.add_subplot(111, projection='3d')
ax.scatter(avgEEGs_pc[:,0], avgEEGs_pc[:,1], avgEEGs_pc[:,2], c=labels, cmap='jet')
ax.set_xlabel('PC0')
ax.set_ylabel('PC1')
ax.set_zlabel('PC2')
ax.set_box_aspect(None, zoom=0.8)
fig.tight_layout();
Visualize waveforms in each cluster.
[fig, ax] = plt.subplots(2, 1)
for channel in range(64):
cluster = labels[channel]
ax[cluster].plot(avgEEGs_projected[channel,:])
for i in range(2):
ax[i].set_ylabel(i)
ax[-1].set_xlabel('Time (ms)')
plt.tight_layout();
ExerciseΒΆ
Try clustering again with 3 components using a GMM.
Plot the clusters on the first three principal component axes.